rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","estimatr","plm","stargazer",
             "fastDummies","ICCbin","ihs","readstata13","xtable",
             "arm")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd(dirname(dirname(rstudioapi::getSourceEditorContext()$path)))

## load data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

controls <- c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas",
              "Frac48", "coal_sqm", "coal_mines", "minerals", "partition", "priests_continuous", "priests_relocated",
              "pop", "protests_40s", "sabotage_40s","terror_40s", "income_76_capita", 
              "income_77_capita")

## load controls
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,controls], by = c("locality"), all.x = T)
data$region_numeric <- as.numeric(as.factor(data$region))
rm(controls)
data$priests_continuous <- data$priests_relocated

## add Prussian census controls
data <- merge(data, read.csv("./Datasets/prussia_clean.csv"), by = c("locality"), all.x = T)

###################
### First stage ###
###################

# subset to years with observations:
data_t <- data[which(data$year>1974 & data$year<1980),]

## rearrange controls 

# create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth_index <- scale(data_t$wealth_index)

# create state capacity index
data_t$state_capacity_index <- scale(data_t$schools)

# create ethnic diversity index
data_t$ethnic_diversity_index <- scale(data_t$Frac48)

# create Russian occupation index
data_t$Russian_occupation_index <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation_index <- scale(data_t$Russian_occupation_index)

# scale industrialization
data_t$industrialization_index <- scale(data_t$minerals)

# scale original industrialization
data_t$coal_sqm <- scale(data_t$coal_sqm)
data_t$minerals <- scale(data_t$minerals)
data_t$industrialization_index_o <- rowSums(data_t[,c("coal_sqm","minerals")], na.rm = F)
data_t$industrialization_index_o[data_t$industrialization_index_o == "NaN"] = NA  
data_t$industrialization_index_o <- scale(data_t$industrialization_index_o)

# scale grievance
data_t$grievance <- scale(data_t$coal_sqm)

# scale grievance II
data_t$grievance_ii <- scale(data_t$coal_mines)

# scale protests 40s
data_t$protests_40s <- scale(data_t$protests_40s)

# scale sabotage 40s
data_t$sabotage_40s <- scale(data_t$sabotage_40s)

# scale terror 40s
data_t$terror_40s <- scale(data_t$terror_40s)

# scale incomes
data_t$income_76_capita <- scale(data_t$income_76_capita)
data_t$income_77_capita <- scale(data_t$income_77_capita)
data_t$income <- rowSums(data_t[,c("income_76_capita", "income_77_capita")], na.rm = T)
data_t$income <- scale(data_t$income)

# scale jews
data_t$jew_perc[is.na(data_t$jew_perc)] = mean(data_t$jew_perc, na.rm=TRUE)
data_t$jew_perc <- scale(data_t$jew_perc)


#################################
#### First stage regressions ####
#################################

#### OLS when averaging over years 1975-79

# aggregate data
data_agg <- aggregate(data_t[,c("locality_numeric", "subbotnik_inkind_", "commanders", 
                                "priests_continuous", "pop", "region_numeric", "wealth_index", 
                                "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", 
                                "industrialization_index", "industrialization_index_o", "income", "grievance", "grievance_ii", "protests_40s",
                                "sabotage_40s", "terror_40s", "jew_perc")], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)


# regressions
no_ctrl_1 <- lm(commanders ~ priests_continuous, data = data_agg)
o_ctrl_1 <- lm(commanders ~ priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg)
n_ctrl_1 <- lm(commanders ~ priests_continuous + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric), data = data_agg)

#### OLS when averaging over years 1980-86

# subset to years with observations:
data_t_late <- data[which(data$year>1979),]

# average across years
data_agg_late <- aggregate(data_t_late[ ,c("locality_numeric", "region_numeric", "commanders", "priests_continuous")], by=list(Category=data_t_late$locality_numeric), FUN=mean, na.rm=T)

# attach covariates from above
data_agg_late <- merge(data_agg_late, data_agg[,c("locality_numeric", "pop", "wealth_index", 
                                                  "state_capacity_index", "ethnic_diversity_index", "Russian_occupation_index", 
                                                  "industrialization_index", "industrialization_index_o", "income", "grievance", "grievance_ii", "protests_40s",
                                                  "sabotage_40s", "terror_40s", "jew_perc")], by = "locality_numeric", all.x = T)

# regressions
no_ctrl_2 <- lm(commanders ~ priests_continuous, data = data_agg_late)
o_ctrl_2 <- lm(commanders ~ priests_continuous + wealth_index + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index_o + factor(region_numeric), data = data_agg_late)
n_ctrl_2 <- lm(commanders ~ priests_continuous + pop + wealth_index + income + state_capacity_index + ethnic_diversity_index + Russian_occupation_index + industrialization_index + grievance + grievance_ii + protests_40s + sabotage_40s + terror_40s + jew_perc + factor(region_numeric), data = data_agg)

## save as tex
stargazer(no_ctrl_1, o_ctrl_1, n_ctrl_1, no_ctrl_2, o_ctrl_2, n_ctrl_2,
          dep.var.labels = c("Secret police officers"),
          covariate.labels = c("Corrupt priests","Population","Wealth","Income",
                               "State capacity","Ethnic diversity","Russian occupation",
                               "Industrialization (old)","Industrialization (new)",
                               "Grievances (coal)","Grievances (mines)","Protests (40s)",
                               "Sabotage (40s)","Terror (40s)","Jews (1871)"),
          column.labels = c("1975-1979","1980-1986"),
          column.separate = c(3,3),
          type="text",
          style = "qje",
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant", "factor"),
          omit.stat = c("rsq", "ser","f"), 
          omit.table.layout = "n",
          add.lines = list(c("Controls", "No", "Yes (old)", "Yes (new)", "No", "Yes (old)", "Yes (new)"),
                           c("Fixed effects", "No", "Yes", "Yes", "No", "Yes", "Yes")),
          out = "./output/TableA11_CORRECTED.tex",
          float=FALSE)

